Multimedia Jumpstart

home *** CD-ROM | disk | FTP | other *** search

/ Multimedia Jumpstart / Multimedia Microsoft Jumpstart Version 1.1a (Microsoft).BIN / develpmt / source / triq / 4d / math.asm < prev next >

Wrap

Assembly Source File | 1992-09-12 | 48.1 KB | 1,874 lines

page ,132 ;---------------------------Module-Header-------------------------------; ; Module Name: MATH.ASM ; ; Contains FIXED point math routines. ; ; Copyright (c) 1987 Microsoft Corporation ;-----------------------------------------------------------------------; ; (C) Copyright Microsoft Corp. 1987,1991. All rights reserved. ; ; You have a royalty-free right to use, modify, reproduce and ; distribute the Sample Files (and/or any modified version) in ; any way you find useful, provided that you agree that ; Microsoft has no warranty obligations or liability for any ; Sample Application Files which are modified. ?WIN = 0 ?PLM = 1 ?NODATA = 0 ?386 = 1 .286 .xlist include cmacros.inc .list UQUAD struc uq0 dw ? uq1 dw ? uq2 dw ? uq3 dw ? UQUAD ends PTL struc ptl_x dd ? ptl_y dd ? PTL ends POINTFX struc x dd ? y dd ? z dd ? w dd ? POINTFX ends MAXINT equ 7FFFh MININT equ 8000h ; The following two equates are just used as shorthand ; for the "word ptr" and "byte ptr" overrides. wptr equ word ptr bptr equ byte ptr ; The following structure should be used to access high and low ; words of a DWORD. This means that "word ptr foo[2]" -> "foo.hi". LONG struc lo dw ? hi dw ? LONG ends FARPOINTER struc off dw ? sel dw ? FARPOINTER ends assert macro a,b,c,d endm EAXtoDXAX macro shld edx,eax,16 ; move HIWORD(eax) to dx endm DXAXtoEAX macro ror eax,16 ; xchg HIWORD(eax) and LOWORD(eax) shrd eax,edx,16 ; move LOWORD(edx) to HIWORD(eax) endm createSeg MATH_TEXT,MathSeg,word,public,CODE createSeg TRIG_TEXT,TrigSeg,word,public,CODE ;---------------------------Public-Routine------------------------------; ; multiply ; ; ; ; Multiplies two 32 bit fixed point numbers. FIXED is a signed 32 bit ; ; number with 16 bits of integer and 16 bits of fraction. ; ; ; ; This routine is pretty small and very popular. We therefore put a ; ; copy in each of several segments. ; ; ; ; Entry: ; ; DX:AX = FIXED x ; ; CX:BX = FIXED y ; ; Returns: ; ; OF = 0 ; ; DX:AX = FIXED x*y ; ; BX = error term ; ; Error Returns: ; ; OF = 1 ; ; Registers destroyed: ; ; BX,CX ; ;-----------------------------------------------------------------------; ; Breaks the 32 bit multiply into four 16 bit multiplies. The key ; ; feature of this routine is the way that the 32 bit numbers are ; ; "SPLIT". This means that we break up the 32 bit signed numbers into ; ; two 16 bit signed numbers. After the breakup, the upper word, ; ; say X.1, is assumed to be the upper word of a 32 bit signed integer ; ; whose lower word is 0000. The lower word is assumed to be the lower ; ; 16 bits of a 32 bit signed word, THE UPPER WORD OF WHICH CAN BE ; ; GENERATED AS A SIGN EXTENSION OF THE LOWER WORD. As an example, ; ; consider what we do with the number 2.500: ; ; ; ; In FIXED notation: 2.500 = 00028000h ; ; ; ; We break it up as: = 00030000h + FFFF8000h = 3.000 - .500 ; ; ; ; So, in SPLIT notation: = 00038000h ; ; ; ; This allows the upper and lower words to each be treated as a signed ; ; 16 bit integer in their own right. Thus, we can use the signed ; ; multiply instruction IMUL. ; ;-----------------------------------------------------------------------; multiply_fixed macro local multiply_fixed_done ; make arg1 a SPLIT mov di,dx cwd sub di,dx mov si,ax ; SPLIT arg1 in DI:SI ; make arg2 a SPLIT mov ax,bx cwd sub cx,dx ; SPLIT arg2 in CX:BX ; AX BX CX DX SI DI ; -- y.0 y.1 -- x.0 x.1 ; start multiplying the high order words, this saves a register mov ax,cx ; Y.1 y.0 Y.1 -- x.0 X.1 ; X.1 * Y.1 imul di ; p.1 y.0 Y.1 -- x.0 X.1 jo multiply_fixed_done xchg ax,cx ; Y.1 y.0 p.1 -- x.0 X.1 ; x.0 * Y.1 imul si ; p.0 y.0 p.1 p.1 x.0 X.1 add cx,dx ; p.0 y.0 p.1 -- x.0 X.1 jo multiply_fixed_done xchg ax,di ; X.1 y.0 p.1 -- x.0 p.0 ; X.1 * y.0 imul bx ; p.0 y.0 p.1 p.1 x.0 p.0 add di,ax ; -- y.0 p.1 p.1 x.0 p.0 adc cx,dx ; -- y.0 p.1 -- x.0 p.0 jo multiply_fixed_done xchg ax,bx ; y.0 -- p.1 -- x.0 p.0 ; x.0 * y.0 imul si ; p.e -- p.1 p.0 x.0 p.0 xchg ax,bx ; -- p.e p.1 p.0 -- p.0 xchg ax,dx ; p.0 p.e p.1 -- -- p.0 cwd ; p.0 p.e p.1 cwd -- p.0 add ax,di ; p.0 p.e p.1 cwd -- -- adc dx,cx ; p.0 p.e -- p.1 -- -- multiply_fixed_done: endm sBegin MathSeg assumes cs,MathSeg assumes ds,nothing assumes es,nothing if ?386 .386 endif if ?386 cProc multiply,<FAR,PUBLIC> ParmD fx1 ParmD fx2 cBegin mov eax,fx1 imul fx2 ; result is in LOWORD(edx):HIWORD(eax) shr eax,16 cEnd else cProc multiply,<FAR,PUBLIC>,<si,di> ParmD fx1 ParmD fx2 cBegin mov ax,fx1.lo mov dx,fx1.hi mov bx,fx2.lo mov cx,fx2.hi call idmul ; dx:cx:bx:ax = dx:ax * cx:bx mov dx,cx ; dx:ax = dx:cx:bx:ax >> 16 mov ax,bx ;; multiply_fixed cEnd endif ;------------------------------ Public -------------------------------; ; fxRound ; ; Return the fixed number corresponding to the rounding of the input ; fixed number. ; ; Entry: ; Rhi,Rlo: registers containing the .hi and .lo portions ; of the fixed number. ; Returns: ; Rhi,Rlo: The answer is returned the the same registers ; Error Returns: ; none ; ;--------------------------------------------------------------------------; cProc fxRound, <FAR, PASCAL> ParmD fx cBegin mov ax,fx.lo mov dx,fx.hi add ax,ax adc dx,0 xor ax,ax cEnd ;---------------------------Public-Routine------------------------------; ; ulNormalize ; ; Normalizes a ULONG so that the highest order bit is 1. Returns the ; number of shifts done. Also returns ZF=1 if the ULONG was zero. ; ; Entry: ; DX:AX = ULONG ; Returns: ; DX:AX = normalized ULONG ; CX = shift count ; ZF = 1 if the ULONG is zero, 0 otherwise ; Registers Destroyed: ; none ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc ulNormalize,<PUBLIC,NEAR> cBegin ; shift by words xor cx,cx or dx,dx js ulNormalize_exit jnz top_word_ok xchg ax,dx or dx,dx jz ulNormalize_exit ; the zero exit mov cl,16 js ulNormalize_exit top_word_ok: ; shift by bytes or dh,dh jnz top_byte_ok xchg dh,dl xchg dl,ah xchg ah,al add cl,8 or dh,dh js ulNormalize_exit top_byte_ok: ; do the rest by bits inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx ulNormalize_exit: cEnd ;---------------------------Public-Routine------------------------------; ; far_idmul ; see idmul below. ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_idmul,<PUBLIC,FAR,NONWIN,NODATA> cBegin cCall idmul cEnd ;---------------------------Public-Routine------------------------------; ; idmul ; ; This is an extended precision multiply routine, intended to emulate ; 80386 imul instruction. ; ; Entry: ; DX:AX = LONG ; CX:BX = LONG ; Returns: ; DX:CX:BX:AX = QUAD product ; Registers Destroyed: ; none ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc idmul,<PUBLIC,NEAR>,<si,di> localQ qTemp cBegin ; put one argument in safe registers mov si,dx mov di,ax ; do the low order unsigned product mul bx mov qTemp.uq0,ax mov qTemp.uq1,dx ; do the high order signed product mov ax,si imul cx mov qTemp.uq2,ax mov qTemp.uq3,dx ; do a mixed product mov ax,si cwd and dx,bx sub qTemp.uq2,dx ; adjust for sign bit sbb qTemp.uq3,0 mul bx add qTemp.uq1,ax adc qTemp.uq2,dx adc qTemp.uq3,0 ; do the other mixed product mov ax,cx cwd and dx,di sub qTemp.uq2,dx sbb qTemp.uq3,0 mul di ; pick up the answer mov bx,ax mov cx,dx xor dx,dx mov ax,qTemp.uq0 add bx,qTemp.uq1 adc cx,qTemp.uq2 adc dx,qTemp.uq3 cEnd ;---------------------------Public-Routine------------------------------; ; far_dmul ; see dmul below. ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_dmul,<PUBLIC,FAR,NONWIN,NODATA> cBegin cCall dmul cEnd ;---------------------------Public-Routine------------------------------; ; dmul ; ; This is an extended precision multiply routine, intended to emulate ; 80386 mul instruction. ; ; Entry: ; DX:AX = LONG ; CX:BX = LONG ; Returns: ; DX:CX:BX:AX = QUAD product ; Registers Destroyed: ; none ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc dmul,<PUBLIC,NEAR>,<si,di> localQ qTemp cBegin ; put one argument in safe registers mov si,dx mov di,ax ; do the low order product mul bx mov qTemp.uq0,ax mov qTemp.uq1,dx ; do the high order product mov ax,si mul cx mov qTemp.uq2,ax mov qTemp.uq3,dx ; do a mixed product mov ax,si mul bx add qTemp.uq1,ax adc qTemp.uq2,dx adc qTemp.uq3,0 ; do the other mixed product mov ax,cx mul di ; pick up the answer mov bx,ax mov cx,dx xor dx,dx mov ax,qTemp.uq0 add bx,qTemp.uq1 adc cx,qTemp.uq2 adc dx,qTemp.uq3 cEnd ;---------------------------Public-Routine------------------------------; ; far_iqdiv ; see iqdiv below. ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_iqdiv,<PUBLIC,FAR,NONWIN,NODATA> cBegin cCall iqdiv cEnd ;---------------------------Public-Routine------------------------------; ; iqdiv ; ; This is an extended precision divide routine which is intended to ; emulate the 80386 64 bit/32 bit IDIV instruction. We don't have the ; 32 bit registers to work with, but we pack the arguments and results ; into what registers we do have. We will divide two signed numbers ; and return the quotient and remainder. We will do INT 0 for overflow, ; just like the 80386 microcode. This should ease conversion later. ; ; This routine just keeps track of the signs and calls qdiv to do the ; real work. ; ; Entry: ; DX:CX:BX:AX = QUAD Numerator ; SI:DI = LONG Denominator ; Returns: ; DX:AX = quotient ; CX:BX = remainder ; Registers Destroyed: ; DI,SI ; Wrote it. ;-----------------------------------------------------------------------; WIMP equ 1 IQDIV_RESULT_SIGN equ 1 IQDIV_REM_SIGN equ 2 assumes ds,nothing assumes es,nothing cProc iqdiv,<PUBLIC,NEAR> localB flags cBegin mov flags,0 ; take the absolute value of the denominator or si,si jns denominator_is_cool xor flags,IQDIV_RESULT_SIGN neg di adc si,0 neg si denominator_is_cool: ; take the absolute value of the denominator or dx,dx jns numerator_is_cool xor flags,IQDIV_RESULT_SIGN + IQDIV_REM_SIGN not ax not bx not cx not dx add ax,1 adc bx,0 adc cx,0 adc dx,0 numerator_is_cool: ; do the unsigned division call qdiv ifdef WIMP jo iqdiv_exit endif ; check for overflow or dx,dx jns have_a_bit_to_spare ifdef WIMP mov ax,8000h dec ah jmp short iqdiv_exit else int 0 ; You're toast, Jack! endif have_a_bit_to_spare: ; negate the result, if required test flags,IQDIV_RESULT_SIGN jz result_is_done neg ax adc dx,0 neg dx result_is_done: ; negate the remainder, if required test flags,IQDIV_REM_SIGN jz remainder_is_done neg bx adc cx,0 neg cx remainder_is_done: iqdiv_exit: cEnd ;---------------------------Public-Routine------------------------------; ; far_qdiv ; see qdiv below. ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_qdiv,<PUBLIC,FAR,NONWIN,NODATA> cBegin cCall qdiv cEnd ;---------------------------Public-Routine------------------------------; ; qdiv ; ; This is an extended precision divide routine which is intended to ; emulate the 80386 64 bit/32 bit DIV instruction. We don't have the ; 32 bit registers to work with, but we pack the arguments and results ; into what registers we do have. We will divide two unsigned numbers ; and return the quotient and remainder. We will do INT 0 for overflow, ; just like the 80386 microcode. This should ease conversion later. ; ; Entry: ; DX:CX:BX:AX = UQUAD Numerator ; SI:DI = ULONG Denominator ; Returns: ; DX:AX = quotient ; CX:BX = remainder ; Registers Destroyed: ; none ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc qdiv,<PUBLIC,NEAR>,<si,di> localQ uqNumerator localD ulDenominator localD ulQuotient localW cShift cBegin ; stuff the quad word into local memory mov uqNumerator.uq0,ax mov uqNumerator.uq1,bx mov uqNumerator.uq2,cx mov uqNumerator.uq3,dx ; check for overflow qdiv_restart: cmp si,dx ja qdiv_no_overflow jb qdiv_overflow cmp di,cx ja qdiv_no_overflow qdiv_overflow: ifdef WIMP mov ax,8000h dec ah jmp qdiv_exit else int 0 ; You're toast, Jack! jmp qdiv_restart endif qdiv_no_overflow: ; check for a zero Numerator or ax,bx or ax,cx or ax,dx jz qdiv_exit_relay ; quotient = remainder = 0 ; handle the special case when the denominator lives in the low word or si,si jnz not_that_special ; calculate (DX=0):CX:BX:uqNumerator.uq0 / (SI=0):DI cmp di,1 ; separate out the trivial case jz div_by_one xchg dx,cx ; CX = remainder.hi = 0 mov ax,bx div di mov bx,ax ; BX = quotient.hi mov ax,uqNumerator.uq0 div di ; AX = quotient.lo xchg bx,dx ; DX = quotient.hi, BX = remainder.lo ifdef WIMP or ax,ax ; clear OF endif qdiv_exit_relay: jmp qdiv_exit ; calculate (DX=0):(CX=0):BX:uqNumerator.uq0 / (SI=0):(DI=1) div_by_one: xchg dx,bx ; DX = quotient.hi, BX = remainder.lo = 0 mov ax,uqNumerator.uq0 ; AX = quotient.lo jmp qdiv_exit not_that_special: ; handle the special case when the denominator lives in the high word or di,di jnz not_this_special_either ; calculate DX:CX:BX:uqNumerator.uq0 / SI:(DI=0) cmp si,1 ; separate out the trivial case jz div_by_10000h mov ax,cx div si mov cx,ax ; CX = quotient.hi mov ax,bx div si ; AX = quotient.lo xchg cx,dx ; DX = quotient.hi, CX = remainder.hi mov bx,uqNumerator.uq0 ; BX = remainder.lo ifdef WIMP or ax,ax ; clear OF endif jmp qdiv_exit ; calculate (DX=0):CX:BX:uqNumerator.uq0 / (SI=1):(DI=0) div_by_10000h: xchg cx,dx ; DX = quotient.hi, CX = remainder.hi = 0 mov ax,bx ; AX = quotient.lo mov bx,uqNumerator.uq0 ; BX = remainder.lo jmp qdiv_exit not_this_special_either: ; normalize the denominator mov dx,si mov ax,di call ulNormalize ; DX:AX = normalized denominator mov cShift,cx ; CX < 16 mov ulDenominator.lo,ax mov ulDenominator.hi,dx ; shift the Numerator by the same amount jcxz numerator_is_shifted mov si,-1 shl si,cl not si ; SI = mask mov bx,uqNumerator.uq3 shl bx,cl mov ax,uqNumerator.uq2 rol ax,cl mov di,si and di,ax or bx,di mov uqNumerator.uq3,bx xor ax,di mov bx,uqNumerator.uq1 rol bx,cl mov di,si and di,bx or ax,di mov uqNumerator.uq2,ax xor bx,di mov ax,uqNumerator.uq0 rol ax,cl mov di,si and di,ax or bx,di mov uqNumerator.uq1,bx xor ax,di mov uqNumerator.uq0,ax numerator_is_shifted: ; set up registers for division mov dx,uqNumerator.uq3 mov ax,uqNumerator.uq2 mov di,uqNumerator.uq1 mov cx,ulDenominator.hi mov bx,ulDenominator.lo ; check for case when Denominator has only 16 bits or bx,bx jnz must_do_long_division div cx mov si,ax mov ax,uqNumerator.uq1 div cx xchg si,dx ; DX:AX = quotient mov di,uqNumerator.uq0 ; SI:DI = remainder (shifted) jmp short unshift_remainder must_do_long_division: ; do the long division, part IZ@NL@% cmp dx,cx ; we only know that DX:AX < CX:BX! jb first_division_is_safe mov ulQuotient.hi,0 ; i.e. 10000h, our guess is too big mov si,ax sub si,bx ; ... remainder is negative jmp short first_adjuster first_division_is_safe: div cx mov ulQuotient.hi,ax mov si,dx mul bx ; fix remainder for low order term sub di,ax sbb si,dx jnc first_adjuster_done ; The remainder is UNSIGNED! We have first_adjuster: ; to use the carry flag to keep track dec ulQuotient.hi ; of the sign. The adjuster loop add di,bx ; watches for a change to the carry adc si,cx ; flag which would indicate a sign jnc first_adjuster ; change IF we had more bits to keep first_adjuster_done: ; a sign in. ; do the long division, part II mov dx,si mov ax,di mov di,uqNumerator.uq0 cmp dx,cx ; we only know that DX:AX < CX:BX! jb second_division_is_safe mov ulQuotient.lo,0 ; i.e. 10000h, our guess is too big mov si,ax sub si,bx ; ... remainder is negative jmp short second_adjuster second_division_is_safe: div cx mov ulQuotient.lo,ax mov si,dx mul bx ; fix remainder for low order term sub di,ax sbb si,dx jnc second_adjuster_done second_adjuster: dec ulQuotient.lo add di,bx adc si,cx jnc second_adjuster second_adjuster_done: mov ax,ulQuotient.lo mov dx,ulQuotient.hi ; unshift the remainder in SI:DI unshift_remainder: mov cx,cShift jcxz remainder_unshifted mov bx,-1 shr bx,cl not bx shr di,cl ror si,cl and bx,si or di,bx xor si,bx remainder_unshifted: mov cx,si mov bx,di ifdef WIMP or ax,ax ; clear OF endif qdiv_exit: cEnd ;---------------------------Public-Routine------------------------------; ; divide ; ; Computes the FIXED quotient of two longs. Works by noting that all ; we want is 16 bits of fraction tacked onto the quotient. To get this, ; we multiply the numerator by 10000h, and let iqdiv do the rest. ; ; Entry: ; DX:AX = long or FIXED numerator ; CX:BX = long or FIXED denominator ; Returns: ; OF = 0 ; DX:AX = FIXED quotient ; Error Returns: ; OF = 1 ;!!! for now !!! ; Registers Destroyed: ; BX,CX ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing if 0; ?386 cProc divide,<PUBLIC,FAR,NODATA,NONWIN> ParmD fx1 ParmD fx2 cBegin mov eax,fx1 mov ebx,fx2 ; ebx = denominator xor edx,edx shld edx,eax,16 ; edx:eax = numerator * 10000h shl eax,16 idiv ebx EAXtoDXAX cEnd else cProc divide,<PUBLIC,FAR,NODATA,NONWIN>,<di,si> ParmD fx1 ParmD fx2 cBegin mov ax,fx1.lo mov dx,fx1.hi mov bx,fx2.lo mov cx,fx2.hi mov si,cx mov di,bx ; SI:DI = denominator mov bx,ax mov cx,dx mov ax,cx cwd xor ax,ax ; DX:CX:BX:AX = numerator * 10000h call iqdiv cEnd endif ;---------------------------Private-Macro-------------------------------; ; set_ov ; ; Sets the overflow flag ; ; Entry: ; reg = word register to use for scratch ; Returns: ; OF set ; Error Returns: ; none ; Registers Destroyed: ; reg ;-----------------------------------------------------------------------; set_ov macro reg mov reg,8000h dec reg endm ;---------------------------Public-Routine------------------------------; ; square_root ; ; Computes the Fixed square root of a Fixed. This algorithm ; comes from Knuth D.E; Metafont:The Program (Addison-Weseley, 1986) ; Part 8:Algebraic and Transcendental Functions. ; ; Notation used below: ; ; Entry: ; DX:AX = number to square root ; Returns: ; DX:AX = square root of input ; Error Returns: ; OF = 1 ; Registers Destroyed: ; BX,CX ; Registers Preserved: ; DS,ES,SI,DI ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc fxsqrt,<PUBLIC,FAR,NODATA,NONWIN>,<> ParmD fx cBegin mov ax, fx.lo mov dx, fx.hi cCall square_root mov dh,dl ; DX:AX *= sqrt(1000h) mov dl,ah mov ah,al xor al,al cEnd cProc ulsqrt,<PUBLIC,FAR,NODATA,NONWIN>,<> ParmD ul cBegin mov ax, ul.lo mov dx, ul.hi cCall square_root cEnd ; ; compute the square root of EDX:EAX ; ; retult returned in EAX and DX:AX ; ; We dont have a 64-bit square_root function ; so approximate it by dividing by 4 until we can use our ; 32-bit function ; cProc sqrt64,<PUBLIC,NEAR,NODATA,NONWIN>,<> cBegin xor cx,cx test_less_than_32: or edx,edx ; is it less than 32-bit? jz less_than_32 inc cx shrd eax,edx,2 ; divide by 4 shr edx,2 jmp short test_less_than_32 less_than_32: ;; ;; EDX:EAX is now less than 32 bits, CX contains count of ;; divisions by 4 we had to do to get here. call square_root ;; and then re-normalize ;; push cx EAXtoDXAX call square_root pop cx jcxz sqrt64_exit @@: shl ax,1 rcl dx,1 loop @b sqrt64_exit: DXAXtoEAX cEnd cProc square_root,<PUBLIC,NEAR,NODATA,NONWIN>,<SI,DI> ;; ParmD fx cBegin ;; mov ax, fx.lo ;; mov dx, fx.hi ; check for < 1 since this algorithm returns 0000:0001 or dx,dx jnz non_zero_arg cmp ax,1 ja non_zero_arg jmp square_root_exit negative_arg: ; can't have number negative xor ax,ax cwd set_ov bx jmp square_root_exit non_zero_arg: cCall ulNormalize ;;;;;;;;jcxz negative_arg jz exit_relay shr cx,1 jc add_shifts_only shr dx,1 rcr ax,1 dec cx add_shifts_only: ;; ;; use 23 for a FIXED square_root, and 15 for a ULONG square_root! ;; if 0 mov ch,23 ; FIXED square_root else mov ch,15 ; ULONG square_root endif sub ch,cl mov cl,8 sub ch,cl jg more_than_8 ; CL = Min(count,8) add cl,ch xor ch,ch ; CH = Max(count-8,0) more_than_8: xchg ax,si ; DI:SI = x mov di,dx xor ax,ax ; AX = y = lower bound = 0 mov bx,2 ; BX = q = estimate of root = 2 add si,si ; intiialize y = top bit of x adc di,di adc ax,ax first_8_loop: add si,si ; move 2 bits from top adc di,di ; of x to bottom of y adc ax,ax assert NC add si,si adc di,di adc ax,ax assert NC sub ax,bx ; AX = y - q jle y_neg_or_zero_8 ;!!!CR loops can be cleaned up some add bx,bx ; BX = 2q sub ax,bx ; AX = y - 3q jg y_greater_q_8 add ax,bx ; AX = y - q dec cl jnz first_8_loop jcxz first_8_was_enough jmp short done_with_1st_8 y_greater_q_8: add bx,2 ; BX = 2q + 2 (or 2q if jg not taken) dec cl jnz first_8_loop or ch,ch jnz done_with_1st_8 first_8_was_enough: xchg ax,bx ; DX:AX = q xor dx,dx shr ax,1 ; root = q >> 1 or ax,ax ; clear overflow flag exit_relay: jmp square_root_exit y_neg_or_zero_8: ; come here if y <= 0 dec bx add bx,bx ; BX = 2q - 2 add ax,bx ; AX = y + q - 2 dec cl jnz first_8_loop jcxz first_8_was_enough ;si is now empty, di contains all signifigant bits done_with_1st_8: mov cl,4 sub ch,cl jg second_4_loop ; CL = Min(count,4) add cl,ch xor ch,ch ; CH = Max(count-8,0) second_4_loop: ; move 2 bits from top add di,di ; of x to bottom of y adc ax,ax assert NC add di,di adc ax,ax assert NC sub ax,bx ; AX = y - q jle y_neg_or_zero_2nd_4 add bx,bx ; BX = 2q ;!!!CR use compare and jump rather than sub, branch, and restore sub ax,bx ; AX = y - 3q jg y_greater_q_2nd_4 add ax,bx ; AX = y - q dec cl jnz second_4_loop jcxz second_4_was_enough jmp short done_with_2nd_4 y_greater_q_2nd_4: add bx,2 ; BX = 2q + 2 (or 2q if jg not taken) dec cl jnz second_4_loop or ch,ch jnz done_with_2nd_4 second_4_was_enough: xchg ax,bx ; DX:AX = q xor dx,dx shr ax,1 ; root = q >> 1 or ax,ax ; clear overflow flag jmp square_root_exit y_neg_or_zero_2nd_4: ; come here if y <= 0 dec bx add bx,bx ; BX = 2q - 2 add ax,bx ; AX = y + q - 2 dec cl jnz second_4_loop jcxz second_4_was_enough done_with_2nd_4: xor dx,dx ; DX:AX = y SI:BX = q mov cl,4 sub ch,cl jg third_4_loop ; CL = Min(count,4) add cl,ch xor ch,ch ; CH = Max(count-13,0) third_4_loop: ;!!!CR Add comment noting that 32 bit math is being used now ; move 2 bits from top add di,di ; of x to bottom of y adc ax,ax adc dx,dx assert NC add di,di adc ax,ax adc dx,dx assert NC sub ax,bx ; DX:AX = y - q sbb dx,si js y_negative_3rd_4 jz y_might_be_zero_3rd_4 sorry_y_not_zero_3rd_4: add bx,bx adc si,si ; SI:BX = 2q sub ax,bx ; DX:AX = y - 3q sbb dx,si jg y_greater_q_3rd_4 jl y_less_or_equal_q_3rd_4 or ax,ax jnz y_greater_q_3rd_4 y_less_or_equal_q_3rd_4: add ax,bx ; DX:AX = y - q adc dx,si dec cl jnz third_4_loop jcxz third_4_was_enough jmp short done_with_3rd_4 y_greater_q_3rd_4: add bx,2 adc si,0 dec cl jnz third_4_loop or ch,ch jnz done_with_3rd_4 third_4_was_enough: xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp square_root_exit ;;; short! y_might_be_zero_3rd_4: or ax,ax jnz sorry_y_not_zero_3rd_4 y_negative_3rd_4: ; come here if y <= 0 sub bx,1 sbb si,0 add bx,bx ; DL:BX = 2q - 2 adc si,si add ax,bx adc dx,si ; DH:AX = y + q - 2 dec cl jnz third_4_loop jcxz third_4_was_enough done_with_3rd_4: xchg ch,cl last_7_loop: add ax,ax adc dx,dx add ax,ax adc dx,dx sub ax,bx ; DX:AX = y - q sbb dx,si js y_negative_last_7 jz y_might_be_zero_last_7 ;!!!CR Clean up the exit so that a single exit point is used. sorry_y_not_zero_last_7: add bx,bx adc si,si ; SI:BX = 2q sub ax,bx ; DX:AX = y - 3q sbb dx,si jg y_greater_q_last_7 jl y_less_or_equal_q_last_7 or ax,ax jnz y_greater_q_last_7 y_less_or_equal_q_last_7: add ax,bx ; DX:AX = y - q adc dx,si loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp short square_root_exit y_greater_q_last_7: add bx,2 adc si,0 loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp short square_root_exit y_might_be_zero_last_7: or ax,ax jnz sorry_y_not_zero_last_7 y_negative_last_7: ; come here if y <= 0 sub bx,1 sbb si,0 add bx,bx ; DL:BX = 2q - 2 adc si,si add ax,bx adc dx,si ; DH:AX = y + q - 2 loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag square_root_exit: cEnd ;---------------------------Public-Routine------------------------------; ; QUAD far_big_cross_product(ptlA,ptlB) ; ; See big_cross_product ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_big_cross_product,<PUBLIC,FAR,NONWIN,NODATA> parmQ ptlA parmQ ptlB cBegin cCall big_cross_product,<ptlA,ptlB> cEnd ;---------------------------Public-Routine------------------------------; ; QUAD big_cross_product(ptlA,ptlB) ; ; Computes the cross product A B - A B with total accuracy. ; x y y x ; ; Answer is returned in DX:CX:BX:AX. ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc big_cross_product,<PUBLIC,NEAR> parmQ ptlA parmQ ptlB localQ Result cBegin ; first do A B ; y x mov ax,ptlA.ptl_y.lo mov dx,ptlA.ptl_y.hi mov bx,ptlB.ptl_x.lo mov cx,ptlB.ptl_x.hi call idmul mov Result.uq0,ax mov Result.uq1,bx mov Result.uq2,cx mov Result.uq3,dx ; now do A B ; x y mov ax,ptlA.ptl_x.lo mov dx,ptlA.ptl_x.hi mov bx,ptlB.ptl_y.lo mov cx,ptlB.ptl_y.hi call idmul sub ax,Result.uq0 sbb bx,Result.uq1 sbb cx,Result.uq2 sbb dx,Result.uq3 cEnd ;---------------------------Public-Routine------------------------------; ; QUAD far_big_dot_product(ptlA,ptlB) ; ; See big_dot_product ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc far_big_dot_product,<PUBLIC,FAR,NONWIN,NODATA> parmQ ptlA parmQ ptlB cBegin cCall big_dot_product,<ptlA,ptlB> cEnd ;---------------------------Public-Routine------------------------------; ; QUAD big_dot_product(ptlA,ptlB) ; ; Computes the dot product A B + A B with total accuracy. ; x x y y ; ; Answer is returned in DX:CX:BX:AX. ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc big_dot_product,<PUBLIC,NEAR> parmQ ptlA parmQ ptlB localQ Result cBegin ; first do A B ; x x mov ax,ptlA.ptl_x.lo mov dx,ptlA.ptl_x.hi mov bx,ptlB.ptl_x.lo mov cx,ptlB.ptl_x.hi call idmul mov Result.uq0,ax mov Result.uq1,bx mov Result.uq2,cx mov Result.uq3,dx ; now do A B ; y y mov ax,ptlA.ptl_y.lo mov dx,ptlA.ptl_y.hi mov bx,ptlB.ptl_y.lo mov cx,ptlB.ptl_y.hi call idmul add ax,Result.uq0 adc bx,Result.uq1 adc cx,Result.uq2 adc dx,Result.uq3 cEnd ;---------------------------Public-Routine------------------------------; ; ULONG lDot4D(ptA,ptB) ; ; Computes the dot product Ax Bx + Ay By + Az Bz with total accuracy. ; ; Answer is returned in DX:AX:CX. ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc lDot4D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN> parmD Pw parmD Pz parmD Py parmD Px parmD Qw parmD Qz parmD Qy parmD Qx cBegin xor ebx,ebx ; accum result in EBX:ECX mov ecx,ebx irp xyz,<x,y,z> mov eax,P&xyz imul Q&xyz add ecx,eax adc ebx,edx endm ;; ;; return result in DX:AX:CX ;; shr ecx,16 mov eax,ebx EAXtoDXAX cEnd ;---------------------------Public-Routine------------------------------; ; fxCross4D(pvV,vP,vQ) ; ; Computes the cross product of vector vP and vector vQ ; ; result is stored in *pvV ; ; v.x = P.y*Q.z - P.z*Q.y ; v.y = -P.x*Q.z - P.z*Q.x ; v.z = P.x*Q.y - P.y*Q.x ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc fxCross4D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN> parmD ppR parmD Pw parmD Pz parmD Py parmD Px parmD Qw parmD Qz parmD Qy parmD Qx cBegin les di,ppR ; ; v.x = P.y*Q.z - P.z*Q.y ; mov eax,Py mov edx,Qz imul edx mov ebx,edx mov ecx,eax mov eax,Pz mov edx,Qy imul edx sub ecx,eax sbb ebx,edx shrd ecx,ebx,16 mov es:[di].x,ecx ; ; v.y = -P.x*Q.z - P.z*Q.x ; mov eax,Px mov edx,Qz neg eax imul edx mov ebx,edx mov ecx,eax mov eax,Pz mov edx,Qx imul edx sub ecx,eax sbb ebx,edx shrd ecx,ebx,16 mov es:[di].y,ecx ; ; v.z = P.x*Q.y - P.y*Q.x ; mov eax,Px mov edx,Qy imul edx mov ebx,edx mov ecx,eax mov eax,Py mov edx,Qx imul edx sub ecx,eax sbb ebx,edx shrd ecx,ebx,16 mov es:[di].z,ecx xor eax,eax mov es:[di].w,eax cEnd ;---------------------------Public-Routine------------------------------; ; FIXED fxLength3D(Vx,Vy,Vz) ; ; Computes length of the vector V as a FIXED number ; ; Answer is returned in DX:AX:CX. ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc fxLength3D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN> parmD Vx parmD Vy parmD Vz cBegin xor ebx,ebx ; accum sum of squares in EBX:ECX mov ecx,ebx irp xyz,<x,y,z> mov eax,V&xyz ; EAX = Vi imul eax ; EDX:EAX = Vi^2 add ecx,eax ; EBX:ECX += Vi^2 adc ebx,edx endm mov eax,ecx mov edx,ebx ;; ;; EDX:EAX now contains sum of squares*1000h, take the square_root cCall sqrt64 ; calculates sqrt(EDX:EAX) ==> DX:AX cEnd sEnd MathSeg .286 if ?386 .386 endif sBegin TrigSeg assumes cs,TrigSeg include vttable.inc ;----------------------------Public-Routine-----------------------------; ; CalcSine ; ; Computes sine(angle). ; ; Entry: ; DX:AX = angle UNSIGNED FIXED ; ; Returns: ; BX = 1 => OK. ; DX:AX = result. ; ; Error Returns: ; BX = 0. ; ; Registers Destroyed: ; CX, flags. ; ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc CalcSine,<FAR,PUBLIC> ParmD fx cBegin mov ax,fx.lo mov dx,fx.hi sub dx,90 jnc no_wrap add dx,360 no_wrap: push dx push ax ;;; errn$ CalcCosine call FAR PTR CalcCosine cEnd ;----------------------------Public-Routine-----------------------------; ; CalcCosine ; ; Computes Cosine(angle). ; ; Entry: ; DX:AX = angle UNSIGNED FIXED ; ; Returns: ; BX = 1 => OK. ; DX:AX = result. ; ; Error Returns: ; BX = 0. ; ; Registers Destroyed: ; CX, flags. ; ;-----------------------------------------------------------------------; ;----------------------------Pseudo-Code--------------------------------; ; INTEGER FAR PASCAL CalcCosine(FIXED) ; short R; ; short Angle; ; { ; negateflag = 0; // don't want result negated ; ; AX = Angle; ; AX -= 900; ; ; if (Angle <= 900) ; { ; jump to negate_cos_result; // return RTable(R, 900 - Angle) ; } ; ; negateflag = -1; // want result negated ; ; if (Angle <= 1800) ; { ; jump to no_cos_negation; // return -RTable(R, Angle - 900) ; } ; ; AX -= 1800; ; ; if (Angle <= 2700) ; { ; jump to negate_cos_result; // return -RTable(R, 2700 - Angle) ; } ; ; negateflag = 0; // dont want result negated ; ; jump to no_cos_negation; // return RTable(R, Angle - 2700) ; ;negate_cos_result: ; negate AX; ; ;no_cos_negation: ; jump to RTable; ; } ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc CalcCosine,<FAR,PUBLIC> ParmD fx cBegin mov ax,fx.lo mov dx,fx.hi xor cx,cx ;Don't want result negated sub dx,90 ; if angle <= 90 jg greater_than_90 jl negate_cos_result ; return r_table(R,90 - angle) or ax,ax jz negate_cos_result greater_than_90: dec cx ;Want result negated mov bx,dx ;If Angle <= 180 sub bx,90 ; return -RTable(R,Angle-90) jg greater_than_180 jl no_cos_negation or ax,ax jz no_cos_negation greater_than_180: sub dx,180 ;If Angle < 270 jl negate_cos_result ; return -RTable(R,270-Angle) inc cx ;Don't want result negated jmp short no_cos_negation ; return RTable(R,Angle-270) negate_cos_result: neg ax ; do the negation. adc dx,0 neg dx no_cos_negation: push cx ; Save negation flag push di ; Interpolation mov di,dx imul di,10 shl di,1 ; make it a word index. mov dx,cs:vttable[di][0] ; Get base value if 0 push dx ; save base value. mov cx,cs:vttable[di][2] ; find (difference between adjacent sub cx,dx xor dx,dx xor bx,bx push dx push ax push cx push bx call multiply ; call far_multiply pop ax add dx,ax endif xor ax,ax mov cx,10000 xor bx,bx push dx push ax push cx push bx call divide pop di pop cx ; Negate result if needed jcxz r_table_exit neg ax ; do the negation. adc dx,0 neg dx r_table_exit: cEnd sEnd TrigSeg end